{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SELECT from Nobel\n",
    "\n",
    "## `nobel` Nobel Laureates\n",
    "\n",
    "We continue practicing simple SQL queries on a single table.\n",
    "\n",
    "This tutorial is concerned with a table of Nobel prize winners:\n",
    "\n",
    "```\n",
    "nobel(yr, subject, winner)\n",
    "```\n",
    "\n",
    "Using the `SELECT` statement."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading spark-stubs, spark-hive\n",
      "Adding Hive conf dir /opt/hive/conf to classpath\n",
      "Creating SparkSession\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "SLF4J: No SLF4J providers were found.\n",
      "SLF4J: Defaulting to no-operation (NOP) logger implementation\n",
      "SLF4J: See https://www.slf4j.org/codes.html#noProviders for further details.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<a target=\"_blank\" href=\"http://jupyter:4041\">Spark UI</a>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$                                  \n",
       "\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n",
       "\n",
       "\u001b[39m\n",
       "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@16562d5a"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import $ivy.`org.apache.spark::spark-sql:3.4.0`\n",
    "\n",
    "import org.apache.log4j.{Level, Logger}\n",
    "Logger.getLogger(\"org\").setLevel(Level.OFF)\n",
    "\n",
    "import org.apache.spark._\n",
    "import org.apache.spark.sql._\n",
    "import org.apache.spark.sql.functions._\n",
    "\n",
    "val spark = {\n",
    "    NotebookSparkSession.builder()\n",
    "    .progress(false)\n",
    "    .appName(\"app03\")\n",
    "    // .master(\"spark://192.168.31.31:7077\")\n",
    "    .master(\"local[*]\")\n",
    "    .config(\"spark.sql.warehouse.dir\", \n",
    "            \"hdfs://192.168.31.31:9000/user/hive/warehouse\") \n",
    "    .config(\"spark.cores.max\", \"4\") \n",
    "    .config(\"spark.executor.instances\", \"1\") \n",
    "    .config(\"spark.executor.cores\", \"2\") \n",
    "    .config(\"spark.executor.memory\", \"10g\") \n",
    "    .config(\"spark.shuffle.service.enabled\", \"false\") \n",
    "    .config(\"spark.dynamicAllocation.enabled\", \"false\") \n",
    "    .config(\"spark.sql.catalogImplementation\", \"hive\")\n",
    "    .config(\"spark.sql.repl.eagerEval.enabled\", \"true\")\n",
    "    .config(\"spark.driver.allowMultipleContexts\", \"true\")\n",
    "    .getOrCreate()\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n",
       "\u001b[39m\n",
       "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m\n",
       "\u001b[36mhiveCxt\u001b[39m: \u001b[32msql\u001b[39m.\u001b[32mhive\u001b[39m.\u001b[32mHiveContext\u001b[39m = org.apache.spark.sql.hive.HiveContext@41c1703f"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import spark.implicits._\n",
    "def sc = spark.sparkContext\n",
    "val hiveCxt = new org.apache.spark.sql.hive.HiveContext(sc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defined \u001b[32mclass\u001b[39m \u001b[36mRichDF\u001b[39m"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "// Credit to Aivean\n",
    "implicit class RichDF(val ds:DataFrame) {\n",
    "    def showHTML(limit: Int = 50, truncate: Int = 100) = {\n",
    "        import xml.Utility.escape\n",
    "        val data = ds.take(limit)\n",
    "        val header = ds.schema.fieldNames.toSeq        \n",
    "        val rows: Seq[Seq[String]] = data.map { row =>\n",
    "          row.toSeq.map {cell =>\n",
    "            val str = cell match {\n",
    "              case null => \"null\"\n",
    "              case binary: Array[Byte] => binary.map(\"%02X\".format(_)).mkString(\"[\", \" \", \"]\")\n",
    "              case array: Array[_] => array.mkString(\"[\", \", \", \"]\")\n",
    "              case seq: Seq[_] => seq.mkString(\"[\", \", \", \"]\")\n",
    "              case _ => cell.toString\n",
    "            }\n",
    "            if (truncate > 0 && str.length > truncate) {\n",
    "              // do not show ellipses for strings shorter than 4 characters.\n",
    "              if (truncate < 4) str.substring(0, truncate)\n",
    "              else str.substring(0, truncate - 3) + \"...\"\n",
    "            } else {\n",
    "              str\n",
    "            }\n",
    "          }: Seq[String]\n",
    "        }\n",
    "    publish.html(s\"\"\" <table>\n",
    "                <tr>\n",
    "                 ${header.map(h => s\"<th>${escape(h)}</th>\").mkString}\n",
    "                </tr>\n",
    "                ${rows.map {row =>\n",
    "                  s\"<tr>${row.map{c => s\"<td>${escape(c)}</td>\" }.mkString}</tr>\"\n",
    "                }.mkString}\n",
    "            </table>\n",
    "        \"\"\")\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[36mnobel\u001b[39m: \u001b[32mDataFrame\u001b[39m = [yr: int, subject: string ... 1 more field]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val nobel = hiveCxt.table(\"sqlzoo.nobel\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Winners from 1950\n",
    "\n",
    "Change the query shown so that it displays Nobel prizes for 1950."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>yr</th><th>subject</th><th>winner</th>\n",
       "                </tr>\n",
       "                <tr><td>1950</td><td>Chemistry</td><td>Kurt Alder</td></tr><tr><td>1950</td><td>Chemistry</td><td>Otto Diels</td></tr><tr><td>1950</td><td>Literature</td><td>Bertrand Russell</td></tr><tr><td>1950</td><td>Medicine</td><td>Philip S. Hench</td></tr><tr><td>1950</td><td>Medicine</td><td>Edward C. Kendall</td></tr><tr><td>1950</td><td>Medicine</td><td>Tadeus Reichstein</td></tr><tr><td>1950</td><td>Peace</td><td>Ralph Bunche</td></tr><tr><td>1950</td><td>Physics</td><td>Cecil Powell</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "nobel.filter($\"yr\"===1950).showHTML()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. 1962 Literature\n",
    "\n",
    "Show who won the 1962 prize for Literature."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>winner</th>\n",
       "                </tr>\n",
       "                <tr><td>John Steinbeck</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(nobel.filter(($\"yr\"===1962) && ($\"subject\"===\"Literature\"))\n",
    "     .select($\"winner\")\n",
    "     .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Albert Einstein\n",
    "\n",
    "Show the year and subject that won 'Albert Einstein' his prize."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>yr</th><th>subject</th>\n",
       "                </tr>\n",
       "                <tr><td>1921</td><td>Physics</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(nobel.filter($\"winner\"===\"Albert Einstein\")\n",
    "    .select($\"yr\", $\"subject\").showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Recent Peace Prizes\n",
    "\n",
    "Give the name of the 'Peace' winners since the year 2000, including 2000."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>winner</th>\n",
       "                </tr>\n",
       "                <tr><td>Tunisian National Dialogue Quartet</td></tr><tr><td>Kailash Satyarthi</td></tr><tr><td>Malala Yousafzai</td></tr><tr><td>European Union</td></tr><tr><td>Ellen Johnson Sirleaf</td></tr><tr><td>Leymah Gbowee</td></tr><tr><td>Tawakel Karman</td></tr><tr><td>Liu Xiaobo</td></tr><tr><td>Barack Obama</td></tr><tr><td>Martti Ahtisaari</td></tr><tr><td>Intergovernmental Panel on Climate Change</td></tr><tr><td>Al Gore</td></tr><tr><td>Grameen Bank</td></tr><tr><td>Muhammad Yunus</td></tr><tr><td>International Atomic Energy Agency</td></tr><tr><td>Mohamed ElBaradei</td></tr><tr><td>Wangari Maathai</td></tr><tr><td>Shirin Ebadi</td></tr><tr><td>Jimmy Carter</td></tr><tr><td>United Nations</td></tr><tr><td>Kofi Annan</td></tr><tr><td>Kim Dae-jung</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(nobel.filter(($\"yr\">=2000) && ($\"subject\"===\"Peace\"))\n",
    "     .select($\"winner\")\n",
    "     .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Literature in the 1980's\n",
    "\n",
    "Show all details **(yr, subject, winner)** of the Literature prize winners for 1980 to 1989 inclusive."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>yr</th><th>subject</th><th>winner</th>\n",
       "                </tr>\n",
       "                <tr><td>1989</td><td>Literature</td><td>Camilo José Cela</td></tr><tr><td>1988</td><td>Literature</td><td>Naguib Mahfouz</td></tr><tr><td>1987</td><td>Literature</td><td>Joseph Brodsky</td></tr><tr><td>1986</td><td>Literature</td><td>Wole Soyinka</td></tr><tr><td>1985</td><td>Literature</td><td>Claude Simon</td></tr><tr><td>1984</td><td>Literature</td><td>Jaroslav Seifert</td></tr><tr><td>1983</td><td>Literature</td><td>William Golding</td></tr><tr><td>1982</td><td>Literature</td><td>Gabriel García Márquez</td></tr><tr><td>1981</td><td>Literature</td><td>Elias Canetti</td></tr><tr><td>1980</td><td>Literature</td><td>Czeslaw Milosz</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(nobel.filter(($\"yr\".between(1980, 1989)) && \n",
    "              ($\"subject\"===\"Literature\"))\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Only Presidents\n",
    "\n",
    "Show all details of the presidential winners:\n",
    "\n",
    "- Theodore Roosevelt\n",
    "- Woodrow Wilson\n",
    "- Jimmy Carter\n",
    "- Barack Obama"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>yr</th><th>subject</th><th>winner</th>\n",
       "                </tr>\n",
       "                <tr><td>2009</td><td>Peace</td><td>Barack Obama</td></tr><tr><td>2002</td><td>Peace</td><td>Jimmy Carter</td></tr><tr><td>1919</td><td>Peace</td><td>Woodrow Wilson</td></tr><tr><td>1906</td><td>Peace</td><td>Theodore Roosevelt</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "nobel.filter($\"winner\".isin(List(\n",
    "    \"Theodore Roosevelt\", \"Woodrow Wilson\", \"Jimmy Carter\", \n",
    "    \"Barack Obama\"): _*)).showHTML()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. John\n",
    "\n",
    "Show the winners with first name John"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>winner</th>\n",
       "                </tr>\n",
       "                <tr><td>John O'Keefe</td></tr><tr><td>John B. Gurdon</td></tr><tr><td>John C. Mather</td></tr><tr><td>John L. Hall</td></tr><tr><td>John B. Fenn</td></tr><tr><td>John E. Sulston</td></tr><tr><td>John Pople</td></tr><tr><td>John Hume</td></tr><tr><td>John E. Walker</td></tr><tr><td>John C. Harsanyi</td></tr><tr><td>John F. Nash Jr.</td></tr><tr><td>John C. Polanyi</td></tr><tr><td>John R. Vane</td></tr><tr><td>John H. van Vleck</td></tr><tr><td>John Cornforth</td></tr><tr><td>John R. Hicks</td></tr><tr><td>John Bardeen</td></tr><tr><td>John C. Kendrew</td></tr><tr><td>John Steinbeck</td></tr><tr><td>John Bardeen</td></tr><tr><td>John F. Enders</td></tr><tr><td>John Cockcroft</td></tr><tr><td>John H. Northrop</td></tr><tr><td>John R. Mott</td></tr><tr><td>John Galsworthy</td></tr><tr><td>John Macleod</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(nobel.filter($\"winner\".startsWith(\"John\"))\n",
    " .select($\"winner\").showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Chemistry and Physics from different years\n",
    "\n",
    "**Show the year, subject, and name of Physics winners for 1980 together with the Chemistry winners for 1984.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>yr</th><th>subject</th><th>winner</th>\n",
       "                </tr>\n",
       "                <tr><td>1984</td><td>Chemistry</td><td>Bruce Merrifield</td></tr><tr><td>1980</td><td>Physics</td><td>James Cronin</td></tr><tr><td>1980</td><td>Physics</td><td>Val Fitch</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(nobel.filter((($\"subject\"===\"Physics\") && \n",
    "               ($\"yr\"===1980)) ||\n",
    "              (($\"subject\"===\"Chemistry\") &&\n",
    "               ($\"yr\"===1984)))\n",
    "     .select($\"yr\", $\"subject\", $\"winner\")\n",
    "     .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9. Exclude Chemists and Medics\n",
    "\n",
    "**Show the year, subject, and name of winners for 1980 excluding Chemistry and Medicine**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>yr</th><th>subject</th><th>winner</th>\n",
       "                </tr>\n",
       "                <tr><td>1980</td><td>Economics</td><td>Lawrence R. Klein</td></tr><tr><td>1980</td><td>Literature</td><td>Czeslaw Milosz</td></tr><tr><td>1980</td><td>Peace</td><td>Adolfo Pérez Esquivel</td></tr><tr><td>1980</td><td>Physics</td><td>James Cronin</td></tr><tr><td>1980</td><td>Physics</td><td>Val Fitch</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(nobel.filter(($\"yr\"===1980) && \n",
    "              ! ($\"subject\".isin(List(\"Chemistry\", \"Medicine\"): _*)))\n",
    "     .select($\"yr\", $\"subject\", $\"winner\").showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10. Early Medicine, Late Literature\n",
    "\n",
    "Show year, subject, and name of people who won a 'Medicine' prize in an early year (before 1910, not including 1910) together with winners of a 'Literature' prize in a later year (after 2004, including 2004)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>yr</th><th>subject</th><th>winner</th>\n",
       "                </tr>\n",
       "                <tr><td>2015</td><td>Literature</td><td>Svetlana Alexievich</td></tr><tr><td>2014</td><td>Literature</td><td>Patrick Modiano</td></tr><tr><td>2013</td><td>Literature</td><td>Alice Munro</td></tr><tr><td>2012</td><td>Literature</td><td>Mo Yan</td></tr><tr><td>2011</td><td>Literature</td><td>Tomas Tranströmer</td></tr><tr><td>2010</td><td>Literature</td><td>Mario Vargas Llosa</td></tr><tr><td>2009</td><td>Literature</td><td>Herta Müller</td></tr><tr><td>2008</td><td>Literature</td><td>Jean-Marie Gustave Le Clézio</td></tr><tr><td>2007</td><td>Literature</td><td>Doris Lessing</td></tr><tr><td>2006</td><td>Literature</td><td>Orhan Pamuk</td></tr><tr><td>2005</td><td>Literature</td><td>Harold Pinter</td></tr><tr><td>2004</td><td>Literature</td><td>Elfriede Jelinek</td></tr><tr><td>1909</td><td>Medicine</td><td>Theodor Kocher</td></tr><tr><td>1908</td><td>Medicine</td><td>Paul Ehrlich</td></tr><tr><td>1908</td><td>Medicine</td><td>Ilya Mechnikov</td></tr><tr><td>1907</td><td>Medicine</td><td>Alphonse Laveran</td></tr><tr><td>1906</td><td>Medicine</td><td>Camillo Golgi</td></tr><tr><td>1906</td><td>Medicine</td><td>Santiago Ramón y Cajal</td></tr><tr><td>1905</td><td>Medicine</td><td>Robert Koch</td></tr><tr><td>1904</td><td>Medicine</td><td>Ivan Pavlov</td></tr><tr><td>1903</td><td>Medicine</td><td>Niels Ryberg Finsen</td></tr><tr><td>1902</td><td>Medicine</td><td>Ronald Ross</td></tr><tr><td>1901</td><td>Medicine</td><td>Emil von Behring</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(nobel.filter((($\"yr\" < 1910) && ($\"subject\"===\"Medicine\")) ||\n",
    "              (($\"yr\" >= 2004) && ($\"subject\"===\"Literature\")))\n",
    "         .select($\"yr\", $\"subject\", $\"winner\")\n",
    "         .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 11. Umlaut\n",
    "\n",
    "Find all details of the prize won by PETER GRÜNBERG\n",
    "\n",
    "> _Non-ASCII characters_   \n",
    "> The u in his name has an umlaut. You may find this link useful <https://en.wikipedia.org/wiki/%C3%9C#Keyboarding>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>yr</th><th>subject</th><th>winner</th>\n",
       "                </tr>\n",
       "                <tr><td>2007</td><td>Physics</td><td>Peter Grünberg</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "nobel.filter(upper($\"winner\")===\"PETER GRÜNBERG\").showHTML()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 12. Apostrophe\n",
    "\n",
    "Find all details of the prize won by EUGENE O'NEILL\n",
    "\n",
    "> _Escaping single quotes_   \n",
    "> You can't put a single quote in a quote string directly. You can use two single quotes within a quoted string."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>yr</th><th>subject</th><th>winner</th>\n",
       "                </tr>\n",
       "                <tr><td>1936</td><td>Literature</td><td>Eugene O'Neill</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "nobel.filter(upper($\"winner\")===\"EUGENE O\\'NEILL\").showHTML()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 13. Knights of the realm\n",
    "\n",
    "Knights in order\n",
    "\n",
    "**List the winners, year and subject where the winner starts with Sir. Show the the most recent first, then by name order.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>winner</th><th>yr</th><th>subject</th>\n",
       "                </tr>\n",
       "                <tr><td>Sir Martin J. Evans</td><td>2007</td><td>Medicine</td></tr><tr><td>Sir Peter Mansfield</td><td>2003</td><td>Medicine</td></tr><tr><td>Sir Paul Nurse</td><td>2001</td><td>Medicine</td></tr><tr><td>Sir Harold Kroto</td><td>1996</td><td>Chemistry</td></tr><tr><td>Sir James W. Black</td><td>1988</td><td>Medicine</td></tr><tr><td>Sir Arthur Lewis</td><td>1979</td><td>Economics</td></tr><tr><td>Sir Nevill F. Mott</td><td>1977</td><td>Physics</td></tr><tr><td>Sir Bernard Katz</td><td>1970</td><td>Medicine</td></tr><tr><td>Sir John Eccles</td><td>1963</td><td>Medicine</td></tr><tr><td>Sir Frank Macfarlane Burnet</td><td>1960</td><td>Medicine</td></tr><tr><td>Sir Cyril Hinshelwood</td><td>1956</td><td>Chemistry</td></tr><tr><td>Sir Robert Robinson</td><td>1947</td><td>Chemistry</td></tr><tr><td>Sir Alexander Fleming</td><td>1945</td><td>Medicine</td></tr><tr><td>Sir Howard Florey</td><td>1945</td><td>Medicine</td></tr><tr><td>Sir Henry Dale</td><td>1936</td><td>Medicine</td></tr><tr><td>Sir Norman Angell</td><td>1933</td><td>Peace</td></tr><tr><td>Sir Charles Sherrington</td><td>1932</td><td>Medicine</td></tr><tr><td>Sir Venkata Raman</td><td>1930</td><td>Physics</td></tr><tr><td>Sir Frederick Hopkins</td><td>1929</td><td>Medicine</td></tr><tr><td>Sir Austen Chamberlain</td><td>1925</td><td>Peace</td></tr><tr><td>Sir William Ramsay</td><td>1904</td><td>Chemistry</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(nobel.filter($\"winner\".startsWith(\"Sir\"))\n",
    "     .select($\"winner\", $\"yr\", $\"subject\")\n",
    "     .orderBy($\"yr\".desc, $\"winner\").showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 14. Chemistry and Physics last\n",
    "\n",
    "The expression **subject IN ('Chemistry','Physics')** can be used as a value - it will be 0 or 1.\n",
    "\n",
    "**Show the 1984 winners and subject ordered by subject and winner name; but list Chemistry and Physics last.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>winner</th><th>subject</th>\n",
       "                </tr>\n",
       "                <tr><td>Richard Stone</td><td>Economics</td></tr><tr><td>Jaroslav Seifert</td><td>Literature</td></tr><tr><td>César Milstein</td><td>Medicine</td></tr><tr><td>Georges J.F. Köhler</td><td>Medicine</td></tr><tr><td>Niels K. Jerne</td><td>Medicine</td></tr><tr><td>Desmond Tutu</td><td>Peace</td></tr><tr><td>Bruce Merrifield</td><td>Chemistry</td></tr><tr><td>Carlo Rubbia</td><td>Physics</td></tr><tr><td>Simon van der Meer</td><td>Physics</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(nobel.withColumn(\"flg\", $\"subject\".isin(\n",
    "    List(\"Chemistry\", \"Physics\"): _*))\n",
    "  .filter($\"yr\"===1984)\n",
    "  .orderBy($\"flg\", $\"subject\", $\"winner\")\n",
    "  .select($\"winner\", $\"subject\")\n",
    "  .showHTML())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Scala",
   "language": "scala",
   "name": "scala"
  },
  "language_info": {
   "codemirror_mode": "text/x-scala",
   "file_extension": ".sc",
   "mimetype": "text/x-scala",
   "name": "scala",
   "nbconvert_exporter": "script",
   "version": "2.12.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
